--- /dev/null
+From: Carbo Kuo <byvoid@byvoid.com>
+Date: Thu, 25 Feb 2021 21:13:38 +0900
+Subject: Fix a severe performance bug in `Conversion::Convert` that caused
+ O(N^2) complexity.
+
+In `Conversion.cpp`, line 27:
+```
+ Optional<const DictEntry*> matched = dict->MatchPrefix(pstr);
+```
+pstr is a `const char*`. However, there is no overloaded function which parameter `const char*`.
+Therefore it matches `Optional<const DictEntry*> MatchPrefix(const std::string& word) const`.
+There is an implicit type conversion from `char*` to `std::string` with time complexity O(N).
+
+I added new benchmark tests. Before the fix:
+
+1: ------------------------------------------------------------------
+1: Benchmark Time CPU Iterations
+1: ------------------------------------------------------------------
+1: BM_Initialization/hk2s 1.17 ms 1.12 ms 645
+1: BM_Initialization/hk2t 0.116 ms 0.116 ms 5922
+1: BM_Initialization/jp2t 0.206 ms 0.201 ms 3500
+1: BM_Initialization/s2hk 18.2 ms 17.9 ms 40
+1: BM_Initialization/s2t 18.2 ms 18.1 ms 39
+1: BM_Initialization/s2tw 17.9 ms 17.8 ms 39
+1: BM_Initialization/s2twp 18.6 ms 18.4 ms 39
+1: BM_Initialization/t2hk 0.055 ms 0.054 ms 12907
+1: BM_Initialization/t2jp 0.120 ms 0.117 ms 5978
+1: BM_Initialization/t2s 0.988 ms 0.984 ms 710
+1: BM_Initialization/tw2s 1.08 ms 1.05 ms 672
+1: BM_Initialization/tw2sp 1.26 ms 1.24 ms 563
+1: BM_Initialization/tw2t 0.088 ms 0.083 ms 8528
+1: BM_Convert2M 413 ms 413 ms 2
+1: BM_Convert/100 1.09 ms 1.09 ms 629
+1: BM_Convert/1000 33.2 ms 33.2 ms 21
+1: BM_Convert/10000 2822 ms 2822 ms 1
+1: BM_Convert/100000 (took longer than 5 minutes, killed)
+
+Now:
+1: ------------------------------------------------------------------
+1: Benchmark Time CPU Iterations
+1: ------------------------------------------------------------------
+1: BM_Initialization/hk2s 1.07 ms 1.07 ms 650
+1: BM_Initialization/hk2t 0.114 ms 0.114 ms 6092
+1: BM_Initialization/jp2t 0.204 ms 0.200 ms 3503
+1: BM_Initialization/s2hk 18.2 ms 18.0 ms 40
+1: BM_Initialization/s2t 17.6 ms 17.6 ms 39
+1: BM_Initialization/s2tw 18.0 ms 17.9 ms 40
+1: BM_Initialization/s2twp 17.9 ms 17.9 ms 39
+1: BM_Initialization/t2hk 0.055 ms 0.055 ms 12855
+1: BM_Initialization/t2jp 0.125 ms 0.124 ms 5772
+1: BM_Initialization/t2s 1.000 ms 0.989 ms 695
+1: BM_Initialization/tw2s 1.09 ms 1.07 ms 668
+1: BM_Initialization/tw2sp 1.29 ms 1.26 ms 558
+1: BM_Initialization/tw2t 0.082 ms 0.082 ms 8558
+1: BM_Convert2M 361 ms 361 ms 2
+1: BM_Convert/100 0.741 ms 0.740 ms 948
+1: BM_Convert/1000 7.54 ms 7.52 ms 94
+1: BM_Convert/10000 76.3 ms 76.3 ms 9
+1: BM_Convert/100000 786 ms 786 ms 1
+
+This bug was reported in https://github.com/BYVoid/OpenCC/issues/478 and https://github.com/BYVoid/OpenCC/issues/517.
+
+Applied-Upstream: https://github.com/BYVoid/OpenCC/commit/c2e548e5e95c9a8ccc5c1e5feb259e8885ef32c6
+---
+ src/Dict.hpp | 7 ++++
+ src/benchmark/Performance.cpp | 77 ++++++++++++++++++++++++++++++++++---------
+ 2 files changed, 69 insertions(+), 15 deletions(-)
+
+diff --git a/src/Dict.hpp b/src/Dict.hpp
+index 461d6d2..1c81034 100644
+--- a/src/Dict.hpp
++++ b/src/Dict.hpp
+@@ -49,6 +49,13 @@ public:
+ virtual Optional<const DictEntry*> MatchPrefix(const char* word,
+ size_t len) const;
+
++ /**
++ * Matches the longest matched prefix of a word.
++ */
++ Optional<const DictEntry*> MatchPrefix(const char* word) const {
++ return MatchPrefix(word, KeyMaxLength());
++ }
++
+ /**
+ * Matches the longest matched prefix of a word.
+ */
+diff --git a/src/benchmark/Performance.cpp b/src/benchmark/Performance.cpp
+index cf8d3aa..d1b6468 100644
+--- a/src/benchmark/Performance.cpp
++++ b/src/benchmark/Performance.cpp
+@@ -1,7 +1,26 @@
++/*
++ * Open Chinese Convert
++ *
++ * Copyright 2020-2021 Carbo Kuo <byvoid@byvoid.com>
++ *
++ * Licensed under the Apache License, Version 2.0 (the "License");
++ * you may not use this file except in compliance with the License.
++ * You may obtain a copy of the License at
++ *
++ * http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing, software
++ * distributed under the License is distributed on an "AS IS" BASIS,
++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++ * See the License for the specific language governing permissions and
++ * limitations under the License.
++ */
++
+ #include <benchmark/benchmark.h>
+ #include <fstream>
+ #include <iostream>
+ #include <memory>
++#include <sstream>
+ #include <streambuf>
+
+ #ifdef _MSC_VER
+@@ -44,21 +63,32 @@ static void BM_Initialization(benchmark::State& state,
+ state.ResumeTiming();
+ }
+ }
+-BENCHMARK_CAPTURE(BM_Initialization, hk2s, "hk2s");
+-BENCHMARK_CAPTURE(BM_Initialization, hk2t, "hk2t");
+-BENCHMARK_CAPTURE(BM_Initialization, jp2t, "jp2t");
+-BENCHMARK_CAPTURE(BM_Initialization, s2hk, "s2hk");
+-BENCHMARK_CAPTURE(BM_Initialization, s2t, "s2t");
+-BENCHMARK_CAPTURE(BM_Initialization, s2tw, "s2tw");
+-BENCHMARK_CAPTURE(BM_Initialization, s2twp, "s2twp");
+-BENCHMARK_CAPTURE(BM_Initialization, t2hk, "t2hk");
+-BENCHMARK_CAPTURE(BM_Initialization, t2jp, "t2jp");
+-BENCHMARK_CAPTURE(BM_Initialization, t2s, "t2s");
+-BENCHMARK_CAPTURE(BM_Initialization, tw2s, "tw2s");
+-BENCHMARK_CAPTURE(BM_Initialization, tw2sp, "tw2sp");
+-BENCHMARK_CAPTURE(BM_Initialization, tw2t, "tw2t");
++BENCHMARK_CAPTURE(BM_Initialization, hk2s, "hk2s")
++ ->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Initialization, hk2t, "hk2t")
++ ->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Initialization, jp2t, "jp2t")
++ ->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Initialization, s2hk, "s2hk")
++ ->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Initialization, s2t, "s2t")->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Initialization, s2tw, "s2tw")
++ ->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Initialization, s2twp, "s2twp")
++ ->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Initialization, t2hk, "t2hk")
++ ->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Initialization, t2jp, "t2jp")
++ ->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Initialization, t2s, "t2s")->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Initialization, tw2s, "tw2s")
++ ->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Initialization, tw2sp, "tw2sp")
++ ->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Initialization, tw2t, "tw2t")
++ ->Unit(benchmark::kMillisecond);
+
+-static void BM_Convert(benchmark::State& state) {
++static void BM_Convert2M(benchmark::State& state) {
+ const std::string config_name = "s2t";
+ const std::string text = ReadText("zuozhuan.txt");
+ const std::unique_ptr<SimpleConverter> converter(Initialize(config_name));
+@@ -66,7 +96,24 @@ static void BM_Convert(benchmark::State& state) {
+ Convert(converter.get(), text);
+ }
+ }
+-BENCHMARK(BM_Convert)->Unit(benchmark::kMillisecond);
++BENCHMARK(BM_Convert2M)->Unit(benchmark::kMillisecond);
++
++static void BM_Convert(benchmark::State& state, int iteration) {
++ std::ostringstream os;
++ for (int i = 0; i < iteration; i++) {
++ os << "Open Chinese Convert 開放中文轉換" << i << std::endl;
++ }
++ const std::string text = os.str();
++ const std::string config_name = "s2t";
++ const std::unique_ptr<SimpleConverter> converter(Initialize(config_name));
++ for (auto _ : state) {
++ Convert(converter.get(), text);
++ }
++}
++BENCHMARK_CAPTURE(BM_Convert, 100, 100)->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Convert, 1000, 1000)->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Convert, 10000, 10000)->Unit(benchmark::kMillisecond);
++BENCHMARK_CAPTURE(BM_Convert, 100000, 100000)->Unit(benchmark::kMillisecond);
+
+ } // namespace opencc
+